/***
This do-file creates the CPS national series used in our analysis. We process 
the CPS data in the most analogous way possible to our processing in the 
employment pipeline. 
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create directories
cap mkdir "${root}/data/derived/CPS"

*-------------------------------------------------------------------------------
* Prepare CPS count of job holders  
*-------------------------------------------------------------------------------

* Open CPS data
project, uses("${root}/data/dvc/CPS/cps_00037.dta") 
use "${root}/data/dvc/CPS/cps_00037.dta", clear 

* Sample restrictions
assert !mi(age, year)
keep if age >= 16
keep if year > 2019

* Create NAICS
gen naics = . 
replace naics = 11 if inrange(ind, 0170, 0290)
replace naics = 21 if inrange(ind, 0370, 0490)
replace naics = 23 if inrange(ind, 0770, 0770)
replace naics = 31 if inrange(ind, 1070, 1790)
replace naics = 32 if inrange(ind, 1870, 2590)
replace naics = 33 if inrange(ind, 2670, 3990)
replace naics = 42 if inrange(ind, 4070, 4590)
replace naics = 44 if inrange(ind, 4670, 5190)
replace naics = 45 if inrange(ind, 5275, 5790)
replace naics = 48 if inrange(ind, 6070, 6290)
replace naics = 49 if inrange(ind, 6370, 6390)
replace naics = 22 if inrange(ind, 0570, 0690)
replace naics = 51 if inrange(ind, 6470, 6780)
replace naics = 52 if inrange(ind, 6870, 6992)
replace naics = 53 if inrange(ind, 7070, 7190)
replace naics = 54 if inrange(ind, 7270, 7490)
replace naics = 55 if inrange(ind, 7570, 7570)
replace naics = 56 if inrange(ind, 7580, 7790)
replace naics = 61 if inrange(ind, 7860, 7890)
replace naics = 62 if inrange(ind, 7970, 8470)
replace naics = 71 if inrange(ind, 8560, 8590)
replace naics = 72 if inrange(ind, 8660, 8690)
replace naics = 81 if inrange(ind, 8770, 9290)
replace naics = 92 if inrange(ind, 9370, 9890)
						
* Be consistent with PIE and CES series
gen naics_code = ""
replace naics_code = "11" if naics == 11
replace naics_code = "21" if naics == 21
replace naics_code = "22" if naics == 22
replace naics_code = "23" if naics == 23
replace naics_code = "3133" if naics == 31 | naics == 32 | naics == 33
replace naics_code = "42" if naics == 42
replace naics_code = "4445" if naics == 44 | naics == 45
replace naics_code = "4849" if naics == 48 | naics == 49
replace naics_code = "51" if naics == 51
replace naics_code = "52" if naics == 52
replace naics_code = "53" if naics == 53
replace naics_code = "54" if naics == 54
replace naics_code = "55" if naics == 55
replace naics_code = "56" if naics == 56
replace naics_code = "61" if naics == 61
replace naics_code = "62" if naics == 62
replace naics_code = "71" if naics == 71
replace naics_code = "72" if naics == 72
replace naics_code = "81" if naics == 81
				
* Drop some sectors according to BLS adjustment 
drop if naics == 92 		// drop those working in public sector to match CES (Total Private Employment)
drop if naics == 11 		// drop those working in agriculture, forestry, fishing, and hunting according to BLS adjustment of CPS to CES
drop if naics == 9290 		// drop workers in private households such as nannies, housekeepers, etc.
			
* Drop some classes of workers according to BLS adjustment
drop if inlist(classwkr, 0, 13, 25, 26, 27, 28, 29) 	// drop missing (0), unincorporated, self-employed (13), and all public sector employees (25-29) 
			
* Keep those with jobs 
keep if empstat == 10
			
* Convert to super sector
gen naics_ss = ""
replace naics_ss = "10" if inlist(naics_code, "11", "21")
replace naics_ss = "20" if inlist(naics_code, "23")
replace naics_ss = "30" if inlist(naics_code, "31-33")
replace naics_ss = "40" if inlist(naics_code, "42", "44-45", "48-49", "22")
replace naics_ss = "50" if inlist(naics_code, "51")
replace naics_ss = "55" if inlist(naics_code, "52", "53")
replace naics_ss = "60" if inlist(naics_code, "54", "55", "56")
replace naics_ss = "65" if inlist(naics_code, "61", "62")
replace naics_ss = "70" if inlist(naics_code, "71", "72")
replace naics_ss = "80" if inlist(naics_code, "81")
			
* Reformat NAICS codes  
replace naics_code = subinstr(naics_code, "-", "_", .) 
			
* Define hourly wages
cap drop wage
replace earnweek = . if earnweek > 9999 
replace uhrswork1 = . if uhrswork1 > 996
replace hourwage = . if hourwage > 999
replace hourwage = earnweek / uhrswork1 if mi(hourwage) & paidhour == 2  		// if paid hourly, divide weekly earnings by amount of hours usually worked
gen wage = hourwage if paidhour == 2
replace wage = earnweek / uhrswork1 if paidhour == 1

replace wage = 100 if wage > 100 & !mi(wage)
replace wage = 5 if wage < 5

* Collapse
gen date = mdy(month, 15, year)
format date %td
gen id = 1 
collapse (sum) employment_cps = id [pw=wtfinl], by(date) fast 	

* Save CPS people employed  
tempfile cps_people_employed 
save `cps_people_employed'

*-------------------------------------------------------------------------------
* Prepare CPS count of multiple job holders  
*-------------------------------------------------------------------------------

* Load 
project, uses("${root}/data/dvc/St Louis Fed/unadjusted_multiple_job_holders_LNU02026619.csv")
import delimited "${root}/data/dvc/St Louis Fed/unadjusted_multiple_job_holders_LNU02026619.csv", clear

* Reorganize  
rename value employment_cps_multiple
replace employment_cps_multiple = 1000 * employment_cps_multiple 

* Create date 
gen month = 1 if substr(label, 6, 3) == "Jan"
replace month = 2 if substr(label, 6, 3) == "Feb"
replace month = 3 if substr(label, 6, 3) == "Mar"
replace month = 4 if substr(label, 6, 3) == "Apr"
replace month = 5 if substr(label, 6, 3) == "May"
replace month = 6 if substr(label, 6, 3) == "Jun"
replace month = 7 if substr(label, 6, 3) == "Jul"
replace month = 8 if substr(label, 6, 3) == "Aug"
replace month = 9 if substr(label, 6, 3) == "Sep"
replace month = 10 if substr(label, 6, 3) == "Oct"
replace month = 11 if substr(label, 6, 3) == "Nov"
replace month = 12 if substr(label, 6, 3) == "Dec"

drop label
gen date = mdy(month, 15, year)
format date %td

*-------------------------------------------------------------------------------
* Prepare CPS total   
*-------------------------------------------------------------------------------

* Merge together 
merge 1:1 date using `cps_people_employed', nogen keep(3) //  assert(3) 

* Create total employment, accounting for 8% of multiple jobholders with more than two jobs 
	// (source: http://ftp.iza.org/dp10355.pdf)
replace employment_cps = employment_cps + employment_cps_multiple * 0.92 + employment_cps_multiple * 2 * 0.08 

* Create normed version 
gen jan = employment_cps if date == td(15jan2020)
gegen base = mean(jan)
gen norm_emp_cps = (employment_cps/base - 1) * 100
drop base jan

keep date norm_emp_cps employment_cps

* Save 
save "${root}/data/derived/CPS/CPS National Monthly.dta", replace
project, creates("${root}/data/derived/CPS/CPS National Monthly.dta")
